Exploratory Data Analysis

Code
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from matplotlib.colors import LinearSegmentedColormap
import warnings

# Set default figure size
plt.rcParams['figure.figsize'] = (10, 6)

# Suppress future warnings
warnings.filterwarnings('ignore', category=FutureWarning)

# Set seaborn style
sns.set_theme(style="whitegrid")

# Load the dataset
df = pd.read_csv("data/security_incidents_cleaned.csv")
Code
# Create an interactive map of all incidents
def create_incidents_map(data):
    # Calculate center coordinates for the map (average of all points)
    center_lat = data['latitude'].mean()
    center_lon = data['longitude'].mean()
    
    # Create a map centered on the average coordinates
    incidents_map = folium.Map(location=[center_lat, center_lon], zoom_start=2)
    
    # Add a marker cluster for better performance with many points
    marker_cluster = MarkerCluster().add_to(incidents_map)
    
    # Add points for each incident with coordinates
    valid_coords = data[data['latitude'].notna() & data['longitude'].notna()]
    
    # Create a color scale based on total_affected
    def get_color(affected):
        if pd.isna(affected) or affected == 0:
            return 'blue'
        elif affected <= 5:
            return 'green'
        elif affected <= 20:
            return 'orange'
        else:
            return 'red'
    
    for idx, row in valid_coords.iterrows():
        # Create popup text with incident details
        popup_text = f"""
        <b>Country:</b> {row['country']}<br>
        <b>Year:</b> {row['year']}<br>
        <b>Total Affected:</b> {row['total_affected']}<br>
        <b>Attack Type:</b> {row['means_of_attack'] if 'means_of_attack' in row and pd.notna(row['means_of_attack']) else 'Unknown'}<br>
        """
        
        # Add circle marker
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            popup=folium.Popup(popup_text, max_width=300),
            fill=True,
            fill_opacity=0.7,
            color=get_color(row['total_affected']),
            fill_color=get_color(row['total_affected'])
        ).add_to(marker_cluster)
    
    return incidents_map

# Create the map
global_incidents_map = create_incidents_map(df)

# Save the map as HTML file
map_filename = "images/global_security_incidents_map.html"
global_incidents_map.save(map_filename)

# Display in notebook (if running in Jupyter)
global_incidents_map
Make this Notebook Trusted to load map: File -> Trust Notebook
Code
# Import necessary libraries for interactive plotting
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

# Assuming df is your DataFrame with columns 'year', 'country', and incident data
# Group data by year and country to prepare for animation
incidents_by_year_country = df.groupby(['year', 'country']).size().reset_index(name='incidents')

# Get total incidents by year for the animation frame sequence
year_totals = incidents_by_year_country.groupby('year')['incidents'].sum().reset_index()
year_totals = year_totals.sort_values('year')

# Create animated choropleth map showing incidents by country over time
fig = px.choropleth(incidents_by_year_country,
                   locations='country',
                   locationmode='country names',
                   color='incidents',
                   animation_frame='year',
                   color_continuous_scale='Viridis',
                   range_color=[0, incidents_by_year_country['incidents'].max()],
                   height=600)

# Improve layout with better title spacing
fig.update_layout(
    title={
        'text': 'Security Incidents by Country Over Time',
        'y': 0.95,  # Move the title up
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 24}  # Increase title font size for better visibility
    },
    coloraxis_colorbar=dict(
        title='Number of Incidents'
    ),
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='natural earth'
    ),
    margin=dict(t=100)  # Add extra top margin for spacing
)

# Add slider and play button settings
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

# Add annotation for total incidents per year with better spacing
for i, year in enumerate(year_totals['year'].unique()):
    year_total = year_totals.loc[year_totals['year'] == year, 'incidents'].values[0]
    fig.frames[i].layout.annotations = [
        dict(
            x=0.5,
            y=0.87,  # Position subtitle lower than title
            xref='paper',
            yref='paper',
            text=f'Total Incidents in {year}: {year_total}',
            showarrow=False,
            font=dict(
                size=18
            )
        )
    ]

# Also add the annotation to the base layout
latest_year = year_totals['year'].max()
latest_total = year_totals.loc[year_totals['year'] == latest_year, 'incidents'].values[0]
fig.update_layout(
    annotations=[
        dict(
            x=0.5,
            y=0.87,  # Position subtitle lower than title
            xref='paper',
            yref='paper',
            text=f'Total Incidents in {latest_year}: {latest_total}',
            showarrow=False,
            font=dict(
                size=18
            )
        )
    ]
)

# For Quarto output, save as HTML
fig.write_html("images/interactive_incidents_over_time.html")

# Display for notebook viewing
fig.show()

# Create an alternative interactive bar chart with year slider
year_incidents = df.groupby('year').size().reset_index(name='incidents')
year_incidents['year'] = year_incidents['year'].astype(str)  # Convert year to string for better display

fig2 = px.bar(year_incidents,
               x='year',
               y='incidents',
               title='Interactive Security Incidents by Year',
               labels={'incidents': 'Number of Incidents', 'year': 'Year'},
               height=500)

# Add range slider
fig2.update_layout(
    title={
        'text': 'Interactive Security Incidents by Year',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    xaxis=dict(
        rangeslider=dict(visible=True),
        type='category'  # Use category type for discrete years
    ),
    bargap=0.1,
    template='plotly_white'
)

# Save the interactive bar chart
fig2.write_html("images/interactive_yearly_incidents_barchart.html")

# Show the bar chart
fig2.show()

Code
import plotly.express as px
import pandas as pd

# Assuming df is your DataFrame with columns 'year', 'country', and incident data

# CHART 1: Countries with most incidents over all time
# Group by country to get total incidents across all years
total_by_country = df.groupby('country').size().reset_index(name='total_incidents')
total_by_country = total_by_country.sort_values('total_incidents', ascending=False)

# Get top 15 countries by total incidents
top15_countries = total_by_country.head(15)

# Create bar chart for top countries over all time
fig_top_all_time = px.bar(
    top15_countries,
    x='country',
    y='total_incidents',
    title='Top 15 Countries by Security Incidents (All Time)',
    labels={'total_incidents': 'Number of Incidents', 'country': 'Country'},
    color='total_incidents',
    color_continuous_scale='Viridis',
    height=600
)

fig_top_all_time.update_layout(
    title={
        'text': 'Top 15 Countries by Security Incidents (All Time)',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 20}
    },
    xaxis={'categoryorder': 'total descending', 'tickangle': 45},  # Sort bars and angle labels
    coloraxis_showscale=False  # Hide the color scale
)

# CHART 2: Countries with most incidents in the last 10 years
current_year = df['year'].max()
ten_years_ago = current_year - 10

# Filter data for last 10 years
recent_df = df[df['year'] >= ten_years_ago]

# Group by country for the last 10 years
recent_by_country = recent_df.groupby('country').size().reset_index(name='recent_incidents')
recent_by_country = recent_by_country.sort_values('recent_incidents', ascending=False)

# Get top 15 countries in the last 10 years
top15_recent = recent_by_country.head(15)

# Create bar chart for top countries in last 10 years
fig_top_recent = px.bar(
    top15_recent,
    x='country',
    y='recent_incidents',
    title=f'Top 15 Countries by Security Incidents (Last 10 Years: {ten_years_ago}-{current_year})',
    labels={'recent_incidents': 'Number of Incidents', 'country': 'Country'},
    color='recent_incidents',
    color_continuous_scale='Viridis',
    height=600
)

fig_top_recent.update_layout(
    title={
        'text': f'Top 15 Countries by Security Incidents (Last 10 Years: {ten_years_ago}-{current_year})',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 20}
    },
    xaxis={'categoryorder': 'total descending', 'tickangle': 45},  # Sort bars and angle labels
    coloraxis_showscale=False  # Hide the color scale
)

# Display the visualizations
fig_top_all_time.show()
fig_top_recent.show()

# Save the visualizations
fig_top_all_time.write_html("images/top_countries_all_time.html")
fig_top_recent.write_html("images/top_countries_recent.html")

print("Bar chart visualizations saved as HTML files in the images directory.")
Bar chart visualizations saved as HTML files in the images directory.